---
title: "Issue Competition in Parliamentary Speeches - Analysis"
author: "Christoph Ivanusch"
date: ""
output:
  html_document:
    df_print: paged

---

```{r setup, include=FALSE, warning=FALSE, message=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning = FALSE)
```


```{r preparation, echo=FALSE, warning=FALSE, message=FALSE}
# load packages
library(dplyr) #for data wrangling
library(tidyr) #for data wrangling
library(stargazer) #for tables
library(psych) #additional functions for summary statistics
library(ggplot2) #for data visualizations
library(GGally) #for additional functions in data viz
library(kableExtra) #for tables
library(broom) #for tidy regression object
library(xtable) #for latex tables
library(xlsx)
library(texreg)
library(lfe) #for regression models with fixed effects and clustered standard errors

# load final data set for analysis
df <- readRDS("Analysis_Data.RDS")

```

# Main Paper

## Table 1

Party issue agendas for legislative period 25:

```{r agendas period 25, echo=FALSE}

agendas_25 <- filter(df[, 1:4], gp == 25)
agendas_25$gp <- NULL
agendas_25_reshape <- reshape(agendas_25,
                         idvar = "issue",
                         timevar = "party",
                         direction = "wide")

agendas_25_reshape[, 2:7] <- round(agendas_25_reshape[, 2:7], digits = 2)
colnames(agendas_25_reshape) <- gsub("speech_percent\\.", "", colnames(agendas_25_reshape))

```

```{r table 1, warning=FALSE}

kable(agendas_25_reshape, format = "html") %>%
  kable_styling(full_width = F)
```

## Figure 1

Box plot for x="Issue Ownership" and y="Speech Issue Variation":

```{r figure 1, warning=FALSE}

df$IO <- as.factor(df$IO)

ggplot(df, aes(x = IO, y = speech_diff_to_median, group=IO)) +
  geom_boxplot(outlier.color = "red") +
  theme_bw() +
  labs(x = "Issue Ownership", y = "Speech Issue Variation")

png("Figure_1.png")
ggplot(df, aes(x = IO, y = speech_diff_to_median, group=IO)) +
  geom_boxplot(outlier.color = "red") +
  theme_bw() +
  labs(x = "Issue Ownership", y = "Speech Issue Variation")

```

## Figure 2

Plot for x="Manifesto Issue Variation" and y="Speech Issue Variation":

```{r figure 2, warning=FALSE}

# plot
ggplot(df, aes(x = manifesto_diff_to_median, y = speech_diff_to_median)) +
  geom_point() + 
  geom_smooth(method = "lm") +
  theme_bw() +
  labs(x = "Manifesto Issue Variation", y = "Speech Issue Variation")

png("Figure_2.png")
ggplot(df, aes(x = manifesto_diff_to_median, y = speech_diff_to_median)) +
  geom_point() + 
  geom_smooth(method = "lm") +
  theme_bw() +
  labs(x = "Manifesto Issue Variation", y = "Speech Issue Variation")

```

## Table 2

Regression models:

```{r table 2, warning=FALSE}

reg1 <- felm(speech_diff_to_median ~ IO +
               IO*government
             | 0 | 0 | party + issue + gp,
             data = df)

reg2 <- felm(speech_diff_to_median ~ manifesto_diff_to_median + 
               manifesto_diff_to_median*government
              | 0 | 0 | party + issue + gp,
             data = df)

reg3 <- felm(speech_diff_to_median ~ IO + manifesto_diff_to_median +
               IO*government +
               manifesto_diff_to_median*government
             | 0 | 0 | party + issue + gp,
             data = df)

stargazer(list(reg1, reg2, reg3),
          covariate.labels = c("Issue Ownership",
                               "Manifesto Issue Variation",
                               "Government participation",
                               "Issue Ownership:Government participation",
                               "Manifesto Issue Variation:Government participation"),
          dep.var.labels = "Speech Issue Variation", type="text",
          note = "Standard errors clustered by party, issue and legislative period.")

```

# Appendix

## Appendix D

```{r Appendix D}

source("04_Validation.R")

df_acc <- cbind(rownames(acc), acc)

colnames(df_acc) <- c("Issue", "TP", "FP", "TN", "FN", "Precision", "Recall", "F1")

df_acc <- arrange(df_acc, Issue)

kable(df_acc, row.names = FALSE, format = "html")

```

## Appendix F

The following tables show the party issue agendas for legislative periods 22, 23, 24 and 26.

```{r Appendix F}

df <- readRDS("Analysis_Data.RDS")

agendas_22 <- filter(df[, 1:4], gp == 22)
agendas_22$gp <- NULL
agendas_22_reshape <- reshape(agendas_22,
                         idvar = "issue",
                         timevar = "party",
                         direction = "wide")

agendas_22_reshape[, 2:5] <- round(agendas_22_reshape[, 2:5], digits = 2)
colnames(agendas_22_reshape) <- gsub("speech_percent\\.", "", colnames(agendas_22_reshape))

agendas_23 <- filter(df[, 1:4], gp == 23)
agendas_23$gp <- NULL
agendas_23_reshape <- reshape(agendas_23,
                         idvar = "issue",
                         timevar = "party",
                         direction = "wide")

agendas_23_reshape[, 2:6] <- round(agendas_23_reshape[, 2:6], digits = 2)
colnames(agendas_23_reshape) <- gsub("speech_percent\\.", "", colnames(agendas_23_reshape))


agendas_24 <- filter(df[, 1:4], gp == 24)
agendas_24$gp <- NULL
agendas_24_reshape <- reshape(agendas_24,
                         idvar = "issue",
                         timevar = "party",
                         direction = "wide")

agendas_24_reshape[, 2:6] <- round(agendas_24_reshape[, 2:6], digits = 2)
colnames(agendas_24_reshape) <- gsub("speech_percent\\.", "", colnames(agendas_24_reshape))


agendas_26 <- filter(df[, 1:4], gp == 26)
agendas_26$gp <- NULL
agendas_26_reshape <- reshape(agendas_26,
                         idvar = "issue",
                         timevar = "party",
                         direction = "wide")

agendas_26_reshape[, 2:6] <- round(agendas_26_reshape[, 2:6], digits = 2)
colnames(agendas_26_reshape) <- gsub("speech_percent\\.", "", colnames(agendas_26_reshape))


kable(agendas_22_reshape, format = "html") %>%
  kable_styling(full_width = F)

kable(agendas_23_reshape, format = "html") %>%
  kable_styling(full_width = F)

kable(agendas_24_reshape, format = "html") %>%
  kable_styling(full_width = F)

kable(agendas_26_reshape, format = "html") %>%
  kable_styling(full_width = F)

```

## Appendix G

Figure 2 of the main paper without the outlier at (38/3.9).

```{r Appendix G, warning=FALSE}

# remove outlier
df_no_outliers <- filter(df, manifesto_diff_to_median < 30)

# plot
ggplot(df_no_outliers, aes(x = manifesto_diff_to_median, y = speech_diff_to_median)) +
  geom_point() + 
  geom_smooth(method = "lm") +
  theme_bw() +
  labs(x = "Manifesto Issue Variation", y = "Speech Issue Variation")

png("Figure_Appendix_F.png")
ggplot(df_no_outliers, aes(x = manifesto_diff_to_median, y = speech_diff_to_median)) +
  geom_point() + 
  geom_smooth(method = "lm") +
  theme_bw() +
  labs(x = "Manifesto Issue Variation", y = "Speech Issue Variation")

```

## Appendix H

In the regression models reported in Appendix H, the outlier at (38/3.9) is excluded.

```{r Appendix H, warning=FALSE}

reg4 <- felm(speech_diff_to_median ~ IO +
               IO*government
             | 0 | 0 | party + issue + gp,
             data = df_no_outliers)

reg5 <- felm(speech_diff_to_median ~ manifesto_diff_to_median + 
               manifesto_diff_to_median*government
              | 0 | 0 | party + issue + gp,
             data = df_no_outliers)

reg6 <- felm(speech_diff_to_median ~ IO + manifesto_diff_to_median +
               IO*government +
               manifesto_diff_to_median*government
             | 0 | 0 | party + issue + gp,
             data = df_no_outliers)

stargazer(list(reg4, reg5, reg6),
          covariate.labels = c("Issue Ownership",
                               "Manifesto Issue Variation",
                               "Government participation",
                               "Issue Ownership:Government participation",
                               "Manifesto Issue Variation:Government participation"),
          dep.var.labels = "Speech Issue Variation", type="text",
          note = "Standard errors clustered by party, issue and legislative period.")

```

## Appendix I

In the regression models reported in Appendix I, *Speech Issue Variation* and *Manifesto Issue variation* are calculated with the mean and not with the median.

```{r Appendix I, warning=FALSE}

# load alternative data set for analysis
df_alternative <- readRDS("Analysis_Data_Alternative.RDS")

reg7 <- felm(speech_diff_to_mean ~ IO +
               IO*government
             | 0 | 0 | party + issue + gp,
             data = df_alternative)

reg8 <- felm(speech_diff_to_mean ~ manifesto_diff_to_mean + 
               manifesto_diff_to_mean*government
              | 0 | 0 | party + issue + gp,
             data = df_alternative)

reg9 <- felm(speech_diff_to_mean ~ IO + manifesto_diff_to_mean +
               IO*government +
               manifesto_diff_to_mean*government
             | 0 | 0 | party + issue + gp,
             data = df_alternative)

stargazer(list(reg7, reg8, reg9),
          covariate.labels = c("Issue Ownership",
                               "Manifesto Issue Variation",
                               "Government participation",
                               "Issue Ownership:Government participation",
                               "Manifesto Issue Variation:Government participation"),
          dep.var.labels = "Speech Issue Variation", type="text",
          note = "Standard errors clustered by party, issue and legislative period.")

```

## Appendix J

Appendix J provides alternative specifications of the regression model reported in Table 2 of the main paper as robustness checks. The models reported here use alternative combinations of clustered standard errors and fixed effects to account for the clustered data structure. The main findings thereby remain largely stable across the different specifications.

- The first model is a simple OLS regression model without fixed effects or clustered standard errors.
- The second model uses clustered standard errors for *party*.
- The third model uses clustered standard errors for *issue*. 
- The fourth model uses clustered standard errors for *legislative period*.
- The fifth model uses fixed effects for *issue* and *legislative period* and clustered standard errors by *party*.
- The sixth model uses fixed effects for *party* and *legislative period* and clustered standard errors by *issue*.
- The seventh model uses fixed effects for *party* and *issue* and clustered standard errors by *legislative period*.
- The eighth model uses fixed effects for all three variables *issue*, *party* and *legislative period*, but no clustered standard errors.
- The ninth model uses fixed effects and clustered standard errors for all three variables *issue*, *party* and *legislative period*.

```{r Appendix J, warning=FALSE}

# Alternative 1
reg10 <- lm(speech_diff_to_median ~ IO +
               IO*government, 
            data = df)

reg11 <- lm(speech_diff_to_median ~ manifesto_diff_to_median + 
               manifesto_diff_to_median*government,
            data = df)

reg12 <- lm(speech_diff_to_median ~ IO + manifesto_diff_to_median +
               IO*government +
               manifesto_diff_to_median*government,
            data = df)

stargazer(list(reg10, reg11, reg12),
          covariate.labels = c("Issue Ownership",
                               "Manifesto Issue Variation",
                               "Government participation",
                               "Issue Ownership:Government participation",
                               "Manifesto Issue Variation:Government participation"),
          dep.var.labels = "Speech Issue Variation", type="text")

# Alternative 2
reg13 <- felm(speech_diff_to_median ~ IO +
               IO*government
             | 0 | 0 | party,
             data = df)

reg14 <- felm(speech_diff_to_median ~ manifesto_diff_to_median + 
               manifesto_diff_to_median*government
             | 0 | 0 | party,
             data = df)

reg15 <- felm(speech_diff_to_median ~ IO + manifesto_diff_to_median +
               IO*government +
               manifesto_diff_to_median*government
             | 0 | 0 | party,
             data = df)

stargazer(list(reg13, reg14, reg15),
          covariate.labels = c("Issue Ownership",
                               "Manifesto Issue Variation",
                               "Government participation",
                               "Issue Ownership:Government participation",
                               "Manifesto Issue Variation:Government participation"),
          dep.var.labels = "Speech Issue Variation", type="text",
          note = "Standard errors clustered by party.")

# Alternative 3
reg16 <- felm(speech_diff_to_median ~ IO +
               IO*government
             | 0 | 0 | issue,
             data = df)

reg17 <- felm(speech_diff_to_median ~ manifesto_diff_to_median + 
               manifesto_diff_to_median*government
             | 0 | 0 | issue,
             data = df)

reg18 <- felm(speech_diff_to_median ~ IO + manifesto_diff_to_median +
               IO*government +
               manifesto_diff_to_median*government
             | 0 | 0 | issue,
             data = df)

stargazer(list(reg16, reg17, reg18),
          covariate.labels = c("Issue Ownership",
                               "Manifesto Issue Variation",
                               "Government participation",
                               "Issue Ownership:Government participation",
                               "Manifesto Issue Variation:Government participation"),
          dep.var.labels = "Speech Issue Variation", type="text",
          note = "Standard errors clustered by issue.")

# Alternative 4
reg19 <- felm(speech_diff_to_median ~ IO +
               IO*government
             | 0 | 0 | gp,
             data = df)

reg20 <- felm(speech_diff_to_median ~ manifesto_diff_to_median + 
               manifesto_diff_to_median*government
             | 0 | 0 | gp,
             data = df)

reg21 <- felm(speech_diff_to_median ~ IO + manifesto_diff_to_median +
               IO*government +
               manifesto_diff_to_median*government
             | 0 | 0 | gp,
             data = df)

stargazer(list(reg19, reg20, reg21),
          covariate.labels = c("Issue Ownership",
                               "Manifesto Issue Variation",
                               "Government participation",
                               "Issue Ownership:Government participation",
                               "Manifesto Issue Variation:Government participation"),
          dep.var.labels = "Speech Issue Variation", type="text",
          note = "Standard errors clustered by legislative period.")


# Alternative 5
reg22 <- felm(speech_diff_to_median ~ IO +
               IO*government
             | issue + gp | 0 | party,
             data = df)

reg23 <- felm(speech_diff_to_median ~ manifesto_diff_to_median + 
               manifesto_diff_to_median*government
             | issue + gp | 0 | party,
             data = df)

reg24 <- felm(speech_diff_to_median ~ IO + manifesto_diff_to_median +
               IO*government +
               manifesto_diff_to_median*government
             | issue + gp | 0 | party,
             data = df)

stargazer(list(reg22, reg23, reg24),
          covariate.labels = c("Issue Ownership",
                               "Manifesto Issue Variation",
                               "Government participation",
                               "Issue Ownership:Government participation",
                               "Manifesto Issue Variation:Government participation"),
          dep.var.labels = "Speech Issue Variation", type="text",
          add.lines = list(c('Issue FEs', 'Yes', 'Yes', 'Yes'),
                           c('Legislative period FEs', 'Yes', 'Yes', 'Yes')),
          note = "Standard errors clustered by party.")

# Alternative 6
reg25 <- felm(speech_diff_to_median ~ IO +
               IO*government
             | party + gp | 0 | issue,
             data = df)

reg26 <- felm(speech_diff_to_median ~ manifesto_diff_to_median + 
               manifesto_diff_to_median*government
             | party + gp | 0 | issue,
             data = df)

reg27 <- felm(speech_diff_to_median ~ IO + manifesto_diff_to_median +
               IO*government +
               manifesto_diff_to_median*government
             | party + gp | 0 | issue,
             data = df)

stargazer(list(reg25, reg26, reg27),
          covariate.labels = c("Issue Ownership",
                               "Manifesto Issue Variation",
                               "Government participation",
                               "Issue Ownership:Government participation",
                               "Manifesto Issue Variation:Government participation"),
          dep.var.labels = "Speech Issue Variation", type="text",
          add.lines = list(c('Party FEs', 'Yes', 'Yes', 'Yes'),
                           c('Legislative period FEs', 'Yes', 'Yes', 'Yes')),
          note = "Standard errors clustered by issue.")

# Alternative 7
reg28 <- felm(speech_diff_to_median ~ IO +
               IO*government
             | party + issue | 0 | gp,
             data = df)

reg29 <- felm(speech_diff_to_median ~ manifesto_diff_to_median + 
               manifesto_diff_to_median*government
             | party + issue | 0 | gp,
             data = df)

reg30 <- felm(speech_diff_to_median ~ IO + manifesto_diff_to_median +
               IO*government +
               manifesto_diff_to_median*government
             | party + issue | 0 | gp,
             data = df)

stargazer(list(reg28, reg29, reg30),
          covariate.labels = c("Issue Ownership",
                               "Manifesto Issue Variation",
                               "Government participation",
                               "Issue Ownership:Government participation",
                               "Manifesto Issue Variation:Government participation"),
          dep.var.labels = "Speech Issue Variation", type="text",
          add.lines = list(c('Party FEs', 'Yes', 'Yes', 'Yes'),
                           c('Issue FEs', 'Yes', 'Yes', 'Yes')),
          note = "Standard errors clustered by legislative period.")


# Alternative 8
reg31 <- felm(speech_diff_to_median ~ IO +
               IO*government
             | party + issue + gp | 0 | 0,
             data = df)

reg32 <- felm(speech_diff_to_median ~ manifesto_diff_to_median + 
               manifesto_diff_to_median*government
             | party + issue + gp | 0 | 0,
             data = df)

reg33 <- felm(speech_diff_to_median ~ IO + manifesto_diff_to_median +
               IO*government +
               manifesto_diff_to_median*government
             | party + issue + gp | 0 | 0,
             data = df)

stargazer(list(reg31, reg32, reg33),
          covariate.labels = c("Issue Ownership",
                               "Manifesto Issue Variation",
                               "Government participation",
                               "Issue Ownership:Government participation",
                               "Manifesto Issue Variation:Government participation"),
          dep.var.labels = "Speech Issue Variation", type="text",
          add.lines = list(c('Party FEs', 'Yes', 'Yes', 'Yes'),
                           c('Issue FEs', 'Yes', 'Yes', 'Yes'),
                           c('Legislative period FEs', 'Yes', 'Yes', 'Yes')))

# Alternative 9
reg34 <- felm(speech_diff_to_median ~ IO +
               IO*government
             | party + issue + gp | 0 | party + issue + gp,
             data = df)

reg35 <- felm(speech_diff_to_median ~ manifesto_diff_to_median + 
               manifesto_diff_to_median*government
             | party + issue + gp | 0 | party + issue + gp,
             data = df)

reg36 <- felm(speech_diff_to_median ~ IO + manifesto_diff_to_median +
               IO*government +
               manifesto_diff_to_median*government
             | party + issue + gp | 0 | party + issue + gp,
             data = df)

stargazer(list(reg34, reg35, reg36),
          covariate.labels = c("Issue Ownership",
                               "Manifesto Issue Variation",
                               "Government participation",
                               "Issue Ownership:Government participation",
                               "Manifesto Issue Variation:Government participation"),
          dep.var.labels = "Speech Issue Variation", type="text",
          add.lines = list(c('Party FEs', 'Yes', 'Yes', 'Yes'),
                           c('Issue FEs', 'Yes', 'Yes', 'Yes'),
                           c('Legislative period FEs', 'Yes', 'Yes', 'Yes')),
          note = "Standard errors clustered by issue, party and legislative period.")


```
